# -*- coding:utf8 -*-

import importlib, re, sys, json, datetime, random, time
from scrapy.selector import Selector
from scrapy.exceptions import CloseSpider
import urllib
import lxml.html

try:
    from scrapy.spiders import Spider
except:
    from scrapy.spider import BaseSpider as Spider

from scrapy.http import Request, FormRequest
from scrapy.utils.response import get_base_url
from scrapy.utils.url import urljoin_rfc
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from gaokaopai.items import *
from gaokaopai.dao import *

importlib.reload(sys)


class ScoreListSpider(Spider):
    name = 'school_list'
    allow = ['wmzy.com']

    def __init__(self, *args, **kwargs):
        super(ScoreListSpider, self).__init__(*args, **kwargs)



    def start_requests(self):
        i =1
        for i in range(1, 138):
            print('============number:============', i)
            url = ('https://www.wmzy.com/api/school/getSchList?prov_filter=00&type_filter=0&diploma_filter=0&flag_filter=0&page=%d&page_len=20&_=1524801416328' % i)
            yield Request(url, callback=self.parse_list, dont_filter=True)


    def parse_list(self, response):
        for a_dom in response.xpath(u"//div[contains(@class,'m-sch-name')]/h3/a"):
            #print(a_dom)

            url = 'https://www.wmzy.com' +''.join(a_dom.xpath(u"./@href").extract()).strip().strip('/\"').strip("/\\").strip('"')
            name = ''.join(a_dom.xpath(u"./text()").extract()).strip().strip('\\n').strip()
            print(name,'==========================', url)


            su = ScrapyUniversityItem()
            su['table'] = 'scrapy_university'
            su['name'] = name
            su['url'] = url

            yield su


